#!/usr/bin/env groovy
package scripts

/**
 * This script extracts the 
 */

@Grapes([
@Grab(group='org.apache.ivy', module='ivy', version='2.2.0'),
@Grab(group='org.apache.pdfbox', module='pdfbox', version='1.7.1'),
@Grab(group='org.biogroovy', module='biogroovy',version='1.1')
])
import org.apache.pdfbox.pdmodel.common.*;
import org.apache.pdfbox.pdmodel.*;
import org.biogroovy.models.*;
import org.biogroovy.util.FrequencyMap;
import org.apache.commons.cli.Option;

String file1 = "/Users/markfortner/Downloads/1471-2105-10-73.pdf"
String pmid = 19245720;
	
def cli = new CliBuilder(usage: 'PubMedAuthors.groovy -f <inputFileList>');

cli.with {
	h( longOpt: 'help', 'PubMedAuthors.groovy -f <inputFileList>')
	f( longOpt: 'inputFiles', args: Option.UNLIMITED_VALUES, argName:'files','A space-delimited list of PDF files to be processed')
}


def options = cli.parse(args)

if (!options){
	return;
}

if (options.h){
	cli.usage();
}


PDDocument document = PDDocument.load(options.f);
PDDocumentCatalog cat = document.documentCatalog;
PDDocumentInformation info = document.getDocumentInformation();

FrequencyMap map = new FrequencyMap();
String[] authorList = info.getCustomMetadataValue("Authors").split(";");

map.addAll(authorList);


